Source code for nlp_architect.models.intent_extraction

# ******************************************************************************
# Copyright 2017-2018 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf

from nlp_architect.nn.tensorflow.python.keras.layers.crf import CRF
from nlp_architect.nn.tensorflow.python.keras.utils import load_model, save_model


[docs]class IntentExtractionModel(object): """ Intent Extraction model base class (using tf.keras) """ def __init__(self): self.model = None
[docs] def fit(self, x, y, epochs=1, batch_size=1, callbacks=None, validation=None): """ Train a model given input samples and target labels. Args: x: input samples y: input sample labels epochs (:obj:`int`, optional): number of epochs to train batch_size (:obj:`int`, optional): batch size callbacks(:obj:`Callback`, optional): Keras compatible callbacks validation(:obj:`list` of :obj:`numpy.ndarray`, optional): optional validation data to be evaluated when training """ assert self.model, 'Model was not initialized' self.model.fit(x, y, epochs=epochs, batch_size=batch_size, shuffle=True, validation_data=validation, callbacks=callbacks)
[docs] def predict(self, x, batch_size=1): """ Get the prediction of the model on given input Args: x: samples to run through the model batch_size (:obj:`int`, optional): batch size: Returns: numpy.ndarray: predicted values by the model """ assert self.model, 'Model was not initialized' return self.model.predict(x, batch_size=batch_size)
[docs] def save(self, path, exclude=None): """ Save model to path Args: path (str): path to save model exclude (list, optional): a list of object fields to exclude when saving """ assert self.model, 'Model was not initialized' topology = {k: v for k, v in self.__dict__.items()} topology.pop('model') if exclude and isinstance(exclude, list): for x in exclude: topology.pop(x) save_model(self.model, topology=topology, filepath=path)
[docs] def load(self, path): """ Load a trained model Args: path (str): path to model file """ load_model(path, self)
@property def input_shape(self): """:obj:`tuple`:Get input shape""" return self.model.layers[0].input_shape @staticmethod def _create_input_embed(sentence_len, is_extern_emb, token_emb_size, vocab_size): if is_extern_emb: in_layer = e_layer = tf.keras.layers.Input(shape=(sentence_len, token_emb_size,), dtype='float32', name='tokens_input') else: in_layer = tf.keras.layers.Input(shape=(sentence_len,), dtype='int32', name='tokens_input') e_layer = tf.keras.layers.Embedding(vocab_size, token_emb_size, input_length=sentence_len, name='embedding_layer')(in_layer) return in_layer, e_layer
[docs] def load_embedding_weights(self, weights): """ Load word embedding weights into the model embedding layer Args: weights (numpy.ndarray): 2D matrix of word weights """ assert self.model is not None, 'Cannot assign weights, apply build() before trying to ' \ 'loading embedding weights ' emb_layer = self.model.get_layer(name='word_embedding') assert emb_layer.output_dim == weights.shape[1], 'embedding vectors shape mismatch' emb_layer.set_weights([weights])
[docs]class MultiTaskIntentModel(IntentExtractionModel): """ Multi-Task Intent and Slot tagging model (using tf.keras) Args: use_cudnn (bool, optional): use GPU based model (CUDNNA cells) """ def __init__(self, use_cudnn=False): super().__init__() self.model = None self.word_length = None self.num_labels = None self.num_intent_labels = None self.word_vocab_size = None self.char_vocab_size = None self.word_emb_dims = None self.char_emb_dims = None self.char_lstm_dims = None self.tagger_lstm_dims = None self.dropout = None self.use_cudnn = use_cudnn
[docs] def build(self, word_length, num_labels, num_intent_labels, word_vocab_size, char_vocab_size, word_emb_dims=100, char_emb_dims=30, char_lstm_dims=30, tagger_lstm_dims=100, dropout=0.2): """ Build a model Args: word_length (int): max word length (in characters) num_labels (int): number of slot labels num_intent_labels (int): number of intent classes word_vocab_size (int): word vocabulary size char_vocab_size (int): character vocabulary size word_emb_dims (int, optional): word embedding dimensions char_emb_dims (int, optional): character embedding dimensions char_lstm_dims (int, optional): character feature LSTM hidden size tagger_lstm_dims (int, optional): tagger LSTM hidden size dropout (float, optional): dropout rate """ self.word_length = word_length self.num_labels = num_labels self.num_intent_labels = num_intent_labels self.word_vocab_size = word_vocab_size self.char_vocab_size = char_vocab_size self.word_emb_dims = word_emb_dims self.char_emb_dims = char_emb_dims self.char_lstm_dims = char_lstm_dims self.tagger_lstm_dims = tagger_lstm_dims self.dropout = dropout words_input = tf.keras.layers.Input(shape=(None,), name='words_input') embedding_layer = tf.keras.layers.Embedding(self.word_vocab_size, self.word_emb_dims, name='word_embedding') word_embeddings = embedding_layer(words_input) word_embeddings = tf.keras.layers.Dropout(self.dropout)(word_embeddings) # create word character input and embeddings layer word_chars_input = tf.keras.layers.Input(shape=(None, self.word_length), name='word_chars_input') char_embedding_layer = tf.keras.layers.Embedding(self.char_vocab_size, self.char_emb_dims, input_length=self.word_length, name='char_embedding') # apply embedding to each word char_embeddings = char_embedding_layer(word_chars_input) # feed dense char vectors into BiLSTM char_embeddings = tf.keras.layers.TimeDistributed( tf.keras.layers.Bidirectional(self._rnn_cell(self.char_lstm_dims)))(char_embeddings) char_embeddings = tf.keras.layers.Dropout(self.dropout)(char_embeddings) # first BiLSTM layer (used for intent classification) first_bilstm_layer = tf.keras.layers.Bidirectional( self._rnn_cell(self.tagger_lstm_dims, return_sequences=True, return_state=True)) first_lstm_out = first_bilstm_layer(word_embeddings) lstm_y_sequence = first_lstm_out[:1][0] # save y states of the LSTM layer states = first_lstm_out[1:] hf, _, hb, _ = states # extract last hidden states h_state = tf.keras.layers.concatenate([hf, hb], axis=-1) intents = tf.keras.layers.Dense(self.num_intent_labels, activation='softmax', name='intent_classifier_output')(h_state) # create the 2nd feature vectors combined_features = tf.keras.layers.concatenate([lstm_y_sequence, char_embeddings], axis=-1) # 2nd BiLSTM layer for label classification second_bilstm_layer = tf.keras.layers.Bidirectional(self._rnn_cell(self.tagger_lstm_dims, return_sequences=True))( combined_features) second_bilstm_layer = tf.keras.layers.Dropout(self.dropout)(second_bilstm_layer) bilstm_out = tf.keras.layers.Dense(self.num_labels)(second_bilstm_layer) # feed BiLSTM vectors into CRF with tf.device('/cpu:0'): crf = CRF(self.num_labels, name='intent_slot_crf') labels = crf(bilstm_out) # compile the model model = tf.keras.Model(inputs=[words_input, word_chars_input], outputs=[intents, labels]) # define losses and metrics loss_f = {'intent_classifier_output': 'categorical_crossentropy', 'intent_slot_crf': crf.loss} metrics = {'intent_classifier_output': 'categorical_accuracy', 'intent_slot_crf': crf.viterbi_accuracy} model.compile(loss=loss_f, optimizer=tf.train.AdamOptimizer(), metrics=metrics) self.model = model
def _rnn_cell(self, units, **kwargs): if self.use_cudnn: rnn_cell = tf.keras.layers.CuDNNLSTM(units, **kwargs) else: rnn_cell = tf.keras.layers.LSTM(units, **kwargs) return rnn_cell # pylint: disable=arguments-differ
[docs] def save(self, path): """ Save model to path Args: path (str): path to save model """ super().save(path, ['use_cudnn'])
[docs]class Seq2SeqIntentModel(IntentExtractionModel): """ Encoder Decoder Deep LSTM Tagger Model (using tf.keras) """ def __init__(self): super().__init__() self.model = None self.vocab_size = None self.tag_labels = None self.token_emb_size = None self.encoder_depth = None self.decoder_depth = None self.lstm_hidden_size = None self.encoder_dropout = None self.decoder_dropout = None
[docs] def build(self, vocab_size, tag_labels, token_emb_size=100, encoder_depth=1, decoder_depth=1, lstm_hidden_size=100, encoder_dropout=0.5, decoder_dropout=0.5): """ Build the model Args: vocab_size (int): vocabulary size tag_labels (int): number of tag labels token_emb_size (int, optional): token embedding vector size encoder_depth (int, optional): number of encoder LSTM layers decoder_depth (int, optional): number of decoder LSTM layers lstm_hidden_size (int, optional): LSTM layers hidden size encoder_dropout (float, optional): encoder dropout decoder_dropout (float, optional): decoder dropout """ self.vocab_size = vocab_size self.tag_labels = tag_labels self.token_emb_size = token_emb_size self.encoder_depth = encoder_depth self.decoder_depth = decoder_depth self.lstm_hidden_size = lstm_hidden_size self.encoder_dropout = encoder_dropout self.decoder_dropout = decoder_dropout words_input = tf.keras.layers.Input(shape=(None,), name='words_input') emb_layer = tf.keras.layers.Embedding(self.vocab_size, self.token_emb_size, name='word_embedding') benc_in = emb_layer(words_input) assert self.encoder_depth > 0, 'Encoder depth must be > 0' for i in range(self.encoder_depth): bencoder = tf.keras.layers.LSTM(self.lstm_hidden_size, return_sequences=True, return_state=True, go_backwards=True, dropout=self.encoder_dropout, name='encoder_blstm_{}'.format(i))(benc_in) benc_in = bencoder[0] b_states = bencoder[1:] benc_h, bene_c = b_states decoder_inputs = benc_in assert self.decoder_depth > 0, 'Decoder depth must be > 0' for i in range(self.decoder_depth): decoder = \ tf.keras.layers.LSTM( self.lstm_hidden_size, return_sequences=True, name='decoder_lstm_{}'.format(i))(decoder_inputs, initial_state=[benc_h, bene_c]) decoder_inputs = decoder decoder_outputs = tf.keras.layers.Dropout(self.decoder_dropout)(decoder) decoder_predictions = tf.keras.layers.TimeDistributed( tf.keras.layers.Dense(self.tag_labels, activation='softmax'), name='decoder_classifier')(decoder_outputs) self.model = tf.keras.Model(words_input, decoder_predictions) self.model.compile(optimizer=tf.train.AdamOptimizer(), loss='categorical_crossentropy', metrics=['categorical_accuracy'])